Amiga Format CD 42

home *** CD-ROM | disk | FTP | other *** search

/ Amiga Format CD 42 / Amiga Format AFCD42 (Issue 126, Aug 1999).iso / -serious- / programming / other / jikes / src / scanner.cpp < prev next >

Wrap

C/C++ Source or Header | 1999-05-14 | 55KB | 1,607 lines

// $Id: scanner.cpp,v 1.5 1999/02/17 19:07:57 shields Exp $ // // This software is subject to the terms of the IBM Jikes Compiler // License Agreement available at the following URL: // http://www.ibm.com/research/jikes. // Copyright (C) 1996, 1998, International Business Machines Corporation // and others. All Rights Reserved. // You must accept the terms of that agreement to use this software. // #include "config.h" #include "scanner.h" #include "control.h" #include "error.h" int (*Scanner::scan_keyword[13]) (wchar_t *p1) = { ScanKeyword0, ScanKeyword0, ScanKeyword2, ScanKeyword3, ScanKeyword4, ScanKeyword5, ScanKeyword6, ScanKeyword7, ScanKeyword8, ScanKeyword9, ScanKeyword10, ScanKeyword0, ScanKeyword12 }; // // The constructor initializes all utility variables. // Scanner::Scanner(Control &control_) : control(control_) { // // If this assertion fails, the Token structure in stream.h must be redesigned !!! // assert(NUM_TERMINALS < 128); // // ------------------------------------------------------------------------------- // We are pulling this code out because we are tired of defending it. We // tought it was obvious that either $ should not have been used for compiler // generated variables or that users should not be allowed to use in variable names... // ------------------------------------------------------------------------------- // // For version 1.1 or above a $ may not be used as part of an identifier name // unless the user specifically requests that it be allowed. // // if (control.option.one_one && (! control.option.dollar)) // Code::SetBadCode(U_DOLLAR); // // // CLASSIFY_TOKEN is a mapping from each character into a // classification routine that is invoked when that character // is the first character encountered in a token. // for (int c = 0; c < 128; c++) { if (Code::IsAlpha(c)) classify_token[c] = &Scanner::ClassifyId; else if (Code::IsDigit(c)) classify_token[c] = &Scanner::ClassifyNumericLiteral; else classify_token[c] = &Scanner::ClassifyBadToken; } classify_token[128] = &Scanner::ClassifyNonAsciiUnicode; classify_token[U_a] = &Scanner::ClassifyIdOrKeyword; classify_token[U_b] = &Scanner::ClassifyIdOrKeyword; classify_token[U_c] = &Scanner::ClassifyIdOrKeyword; classify_token[U_d] = &Scanner::ClassifyIdOrKeyword; classify_token[U_e] = &Scanner::ClassifyIdOrKeyword; classify_token[U_f] = &Scanner::ClassifyIdOrKeyword; classify_token[U_g] = &Scanner::ClassifyIdOrKeyword; classify_token[U_i] = &Scanner::ClassifyIdOrKeyword; classify_token[U_l] = &Scanner::ClassifyIdOrKeyword; classify_token[U_n] = &Scanner::ClassifyIdOrKeyword; classify_token[U_p] = &Scanner::ClassifyIdOrKeyword; classify_token[U_r] = &Scanner::ClassifyIdOrKeyword; classify_token[U_s] = &Scanner::ClassifyIdOrKeyword; classify_token[U_t] = &Scanner::ClassifyIdOrKeyword; classify_token[U_v] = &Scanner::ClassifyIdOrKeyword; classify_token[U_w] = &Scanner::ClassifyIdOrKeyword; classify_token[U_SINGLE_QUOTE] = &Scanner::ClassifyCharLiteral; classify_token[U_DOUBLE_QUOTE] = &Scanner::ClassifyStringLiteral; classify_token[U_PLUS] = &Scanner::ClassifyPlus; classify_token[U_MINUS] = &Scanner::ClassifyMinus; classify_token[U_EXCLAMATION] = &Scanner::ClassifyNot; classify_token[U_PERCENT] = &Scanner::ClassifyMod; classify_token[U_CARET] = &Scanner::ClassifyXor; classify_token[U_AMPERSAND] = &Scanner::ClassifyAnd; classify_token[U_STAR] = &Scanner::ClassifyStar; classify_token[U_BAR] = &Scanner::ClassifyOr; classify_token[U_TILDE] = &Scanner::ClassifyComplement; classify_token[U_SLASH] = &Scanner::ClassifySlash; classify_token[U_GREATER] = &Scanner::ClassifyGreater; classify_token[U_LESS] = &Scanner::ClassifyLess; classify_token[U_LEFT_PARENTHESIS] = &Scanner::ClassifyLparen; classify_token[U_RIGHT_PARENTHESIS] = &Scanner::ClassifyRparen; classify_token[U_LEFT_BRACE] = &Scanner::ClassifyLbrace; classify_token[U_RIGHT_BRACE] = &Scanner::ClassifyRbrace; classify_token[U_LEFT_BRACKET] = &Scanner::ClassifyLbracket; classify_token[U_RIGHT_BRACKET] = &Scanner::ClassifyRbracket; classify_token[U_SEMICOLON] = &Scanner::ClassifySemicolon; classify_token[U_QUESTION] = &Scanner::ClassifyQuestion; classify_token[U_COLON] = &Scanner::ClassifyColon; classify_token[U_COMMA] = &Scanner::ClassifyComma; classify_token[U_DOT] = &Scanner::ClassifyPeriod; classify_token[U_EQUAL] = &Scanner::ClassifyEqual; return; } // // Associate a lexical stream with this file // void Scanner::Initialize(FileSymbol *file_symbol) { lex = new LexStream(control, file_symbol); lex -> Reset(); LexStream::Token *current_token = &(lex -> token_stream.Next()); // add 0th token ! current_token -> SetKind(0); current_token -> SetLocation(0); current_token -> SetSymbol(NULL); if (control.option.comments) { LexStream::Comment *current_comment = &(lex -> comment_stream.Next()); // add 0th comment ! current_comment -> string = NULL; current_comment -> length = 0; current_comment -> previous_token = -1; // No token precedes this comment current_comment -> location = 0; } lex -> line_location.Next() = 0; // mark starting location of line # 0 return; } // // This is one of the main entry point for the Java lexical analyser. // Its input is the name of a regular text file. Its output is a stream // of tokens. // void Scanner::SetUp(FileSymbol *file_symbol) { Initialize(file_symbol); lex -> CompressSpace(); file_symbol -> lex_stream = lex; return; } // // This is one of the main entry point for the Java lexical analyser. // Its input is the name of a regular text file. Its output is a stream // of tokens. // void Scanner::Scan(FileSymbol *file_symbol) { Initialize(file_symbol); lex -> ReadInput(); cursor = lex -> InputBuffer(); if (cursor) { Scan(); lex -> CompressSpace(); // // // if (control.option.dump_errors) { lex -> SortMessages(); for (int i = 0; i < lex -> bad_tokens.Length(); i++) lex -> PrintEmacsMessage(i); cout.flush(); } lex -> DestroyInput(); // get rid of input buffer } else { delete lex; lex = NULL; } file_symbol -> lex_stream = lex; return; } // // Scan the InputBuffer() and process all tokens and comments. // void Scanner::Scan() { wchar_t *input_buffer_tail = &cursor[lex -> InputBufferLength()]; // // CURSOR is assumed to point to the next character to be scanned. // Using CURSOR,we jump to the proper classification function // which scans and classifies the token and returns the location of // the character immediately following it. // do { SkipSpaces(); (this ->* classify_token[*cursor < 128 ? *cursor : 128])(); } while (cursor < input_buffer_tail); // // Add a a gate after the last line. // lex -> line_location.Next() = input_buffer_tail - lex -> InputBuffer(); // // If the brace_stack is not empty, then there are unmatched left // braces in the input. Each unmatched left brace should point to // the EOF token as a substitute for a matching right brace. // for (LexStream::TokenIndex left_brace = brace_stack.Top(); left_brace; left_brace = brace_stack.Top()) { lex -> token_stream[left_brace].SetRightBrace(lex -> token_stream.Length() - 1); brace_stack.Pop(); } return; } // // CURSOR points to the s